# Scrapy settings for example project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
#     http://doc.scrapy.org/topics/settings.html
#
SPIDER_MODULES = ["taobao.spiders"]
NEWSPIDER_MODULE = "taobao.spiders"

# 设置ua
# USER_AGENT = "scrapy-redis (+https://github.com/rolando/scrapy-redis)"

# 设置重复过滤模块
DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"

# 设置调度器， scrapy_redis中的调度器具备与数据库交互功能
SCHEDULER = "scrapy_redis.scheduler.Scheduler"

# 设置当爬虫结束的时候是否保持redis数据库中的去重集合与任务队列
SCHEDULER_PERSIST = True

# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderPriorityQueue"
# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderQueue"
# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderStack"

ITEM_PIPELINES = {
    "taobao.pipelines.TaobaoPipeline": 300,
    # 当开启管道，该管道将把数据保存到redis数据库中
    "taobao.pipelines.RedisPipeline": 400,
}

# 设置redis数据库
REDIS_URL = "redis://172.16.123.223:6379"

# LOG_LEVEL = "DEBUG"

# Introduce an artifical delay to make use of parallelism. to speed up the
# crawl.

# 下载延时
DOWNLOAD_DELAY = 1
